clear

insheet using "$sourcedatadir/ca/canadainflation.csv", names
// For clarity copy nygdpdeflkdzg (which is the World Bank code) to a var named...
gen inflation = nygdpdeflkdzg
// Convert the annual inflation rate series into a GDP deflator series
gen gdpdeflator = 1 if _n == 1
replace gdpdeflator = gdpdeflator[_n-1] * (1 + (inflation/100)) if _n > 1
// Rescale so that 2010 is the base year instead of the first year in the series (1963).
// This matches the base year for our already-deflated income data.
gen rescale = gdpdeflator if year == 2011
egen rescale2 = max(rescale)
replace gdpdeflator = 100*gdpdeflator/rescale2
drop rescale* source inflation nygdpdeflkdzg
save "$gendatadir/canadadeflator.dta", replace

clear
insheet using "$sourcedatadir/ca/cansim Table 326-0021.csv"
rename ref_date year
rename geo country
rename value cpideflator
drop if comm != "All-items"
*drop if comm != "All-items excluding energy"
gen rescale = cpideflator if year == 2011
egen rescale2 = max(rescale)
replace cpideflator = 100*cpideflator/rescale2
drop rescale* comm
gen gdpdeflator = cpideflator
save "$gendatadir/canadadeflator.dta", replace

clear
insheet using "$sourcedatadir/ca/cdn incumbents.csv"
sort election_year
save "$gendatadir/cdn incumbents.dta", replace

clear
*insheet using "cansim Table 204-0001 market_top_incomes long.csv"
insheet using "$sourcedatadir/ca/cansim Table 204-0001_20150803.csv"
replace value = "" if value == ".."
destring value, replace
rename ref_date year
drop geo
gen     group_code = ""
replace group_code = "b90" if group == "Bottom 90 percent income group"
replace group_code = "b95" if group == "Bottom 95 percent income group"
replace group_code = "b99" if group == "Bottom 99 percent income group"
replace group_code = "t01" if group == "Top 0.1 percent income group"
replace group_code = "t1" if group == "Top 1 percent income group"
replace group_code = "t10" if group == "Top 10 percent income group"
replace group_code = "t5" if group == "Top 5 percent income group"
gen     conc_code = ""
replace conc_code = "at" if concept == "After tax income"
replace conc_code = "atcg" if concept == "After tax income with capital gains"
replace conc_code = "mi" if concept == "Market income"
replace conc_code = "micg" if concept == "Market income with capital gains"
replace conc_code = "ti" if concept == "Total income"
replace conc_code = "ticg" if concept == "Total income with capital gains"
gen     stat_code = ""
replace stat_code = "mean" if stat == "Average income (current dollars)"
replace stat_code = "median" if stat == "Median income (current dollars)"
replace stat_code = "pcs5y" if stat == "Percentage in the same quantile five years ago"
replace stat_code = "pcs1y" if stat == "Percentage in the same quantile last year"
replace stat_code = "pct55y" if stat == "Percentage in top 5 percentiles five years ago"
replace stat_code = "pct51y" if stat == "Percentage in top 5 percentiles last year"
replace stat_code = "share" if stat == "Share of income"
replace stat_code = "thresh" if stat == "Threshold value (current dollars)"
*encode group_code, gen(groupcode)
*encode conc_code, gen(conccode)
*encode stat_code, gen(statcode)
drop concept group stat
rename value inc_
egen vcode = concat(group_code stat_code conc_code), p("_")
drop *_code
reshape wide inc_, i(year) j(vcode) string
sort year
*save "cansim Table 204-0001 market_top_incomes long.dta", replace
save "$gendatadir/cansim Table 204-0001.dta", replace

clear
insheet using "$sourcedatadir/ca/canadaincomes_pretax_19651975_quintiles.csv"
sort year
save "$gendatadir/canadaincomes_pretax_19651975_quintiles.dta", replace

*clear
**insheet using "cansim Table 202-0202 pretax average long.csv"
*insheet using "cansim Table 202-0202_20150803.csv"
*drop geo familytype
*rename ref_date year
*rename value inc_mi_mean_sc
*label var inc_mi_mean_sc "Already in constant dollars"
*sort year
**save "cansim Table 202-0202 pretax average long.dta", replace
*save "cansim Table 202-0202.dta", replace


clear
*insheet using "cansim Table 202-0604 posttax quintiles long.csv", names
*insheet using "cansim Table 202-0701 incomes long.csv", names
// Already constant dollars!
insheet using "$sourcedatadir/ca/cansim Table 202-0701_20150803.csv", names
rename ref_date year
drop province universes
gen     incconc_code = ""
replace incconc_code = "at" if incomeconcept == "After-tax income"
replace incconc_code = "mi" if incomeconcept == "Market income"
replace incconc_code = "ti" if incomeconcept == "Total income"
gen     statistics_code = ""
replace statistics_code = "mean" if statistics == "Average income (Dollars)"
replace statistics_code = "share" if statistics == "Share of income (Percent)"
gen     quintile_code = .
replace quintile_code = 1 if quintile == "Lowest quintile"
replace quintile_code = 2 if quintile == "Second quintile"
replace quintile_code = 3 if quintile == "Third quintile"
replace quintile_code = 4 if quintile == "Fourth quintile"
replace quintile_code = 5 if quintile == "Highest quintile"
drop if quintile == "Total of quintiles"
rename value inc_
drop incomeconcept statistics quintile
egen vcode = concat(incconc_code statistics_code quintile_code), p("_")
drop *_code
reshape wide inc_, i(year) j(vcode) string

// Drop currently superfluous data
drop *share*

// This is based on data from Table 202-0601, then processed through our algorithm (in R code)
merge 1:1 year using "$gendatadir/canadaincomes_19762011_processed.dta"
foreach quin in 1 2 3 4 {
  rename inc_ul_quin`quin'_yb inc_dist_quin`quin'_ul
}
rename inc_ll_95_yb inc_dist_ll_95
rename inc_mean_yb  inc_dist_mean
rename inc_50_yb    inc_dist_50
drop _merge

// This is as above, but using a fitted log-normal distribution to extract quantile values, instead.
merge 1:1 year using "$gendatadir/canadaincomes_19762011_fdist_processed.dta"
foreach quin in 1 2 3 4 {
  rename inc_ul_quin`quin'_yb inc_fdist_quin`quin'_ul
  label var inc_fdist_quin`quin'_ul "fdist"
}
rename inc_ll_95_yb inc_fdist_ll_95
label var inc_fdist_ll_95 "fdist"
rename inc_mean_yb  inc_fdist_mean
label var inc_fdist_mean "fdist"
rename inc_50_yb    inc_fdist_50
drop _merge


merge 1:1 year using "$gendatadir/canadaincomes_19631975_processed.dta"
foreach quin in 1 2 3 4 {
  rename inc_ul_quin`quin'_yb inc_distyb_at_quin`quin'
}
rename inc_ll_95_yb inc_distyb_at_ll_95
rename inc_mean_yb  inc_distyb_at_mean
rename inc_50_yb    inc_distyb_at_50
**merge 1:1 year using "$gendatadir/canadaincomes_pretax_19651975_quintiles.dta"
*rename inc_ll_95_yb inc_distyb_at_ll_95
**rename inc_mean_yb  inc_distyb_at_mean
**rename inc_median_yb    inc_distyb_at_50
**rename inc_ul_quin1_yb inc_distyb_at_quin1
**rename inc_ul_quin2_yb inc_distyb_at_quin2
**rename inc_ul_quin3_yb inc_distyb_at_quin3
**rename inc_ul_quin4_yb inc_distyb_at_quin4
drop _merge

*merge 1:1 year using "canadaincomes_1963_processed.dta"
*drop _merge

*merge 1:1 year using "cansim Table 202-0202 pretax average long.dta"
*merge 1:1 year using "cansim Table 202-0202.dta"
*drop _merge

*merge 1:1 year using "cansim Table 204-0001 market_top_incomes long.dta"
merge 1:1 year using "$gendatadir/cansim Table 204-0001.dta"
drop _merge

merge 1:1 year using "$gendatadir/canadadeflator.dta"
drop _merge
sort year
tsset year


foreach varn of varlist inc_distyb_*{
  ipolate `varn' year, generate(`varn'_i)
  replace `varn' = `varn'_i if `varn' == .
  drop `varn'_i
}
*ipolate inc_ul_quin2_yb year if year < 1976, generate(inc_ul_quin2_yb_i)
*ipolate inc_ul_quin3_yb year if year < 1976, generate(inc_ul_quin3_yb_i)
*ipolate inc_ul_quin4_yb year if year < 1976, generate(inc_ul_quin4_yb_i)
*ipolate inc_ll_95_yb year if year < 1976, generate(inc_ll_95_yb_i)
*ipolate inc_mean_yb year if year < 1976, generate(inc_mean_yb_i)
*ipolate inc_50_yb year if year < 1976, generate(inc_50_yb_i)
*
*replace inc_ul_quin1_yb = inc_ul_quin1_yb_i if inc_ul_quin1_yb == .
*replace inc_ul_quin2_yb = inc_ul_quin2_yb_i if inc_ul_quin2_yb == .
*replace inc_ul_quin3_yb = inc_ul_quin3_yb_i if inc_ul_quin3_yb == .
*replace inc_ul_quin4_yb = inc_ul_quin4_yb_i if inc_ul_quin4_yb == .
*replace inc_ll_95_yb = inc_ll_95_yb_i if inc_ll_95_yb == .
*replace inc_mean_yb = inc_mean_yb_i if inc_mean_yb == .
*replace inc_50_yb = inc_50_yb_i if inc_50_yb == .
*
*drop inc_ul_quin1_yb_i inc_ul_quin2_yb_i inc_ul_quin3_yb_i inc_ul_quin4_yb_i inc_ll_95_yb_i inc_mean_yb_i inc_50_yb_i

foreach stub in "mi" "at" "ti" {
  gen     inc_`stub'_mean_sc = (inc_`stub'_mean_1 + inc_`stub'_mean_2 + inc_`stub'_mean_3 + inc_`stub'_mean_4 + inc_`stub'_mean_5)/5
}


// This is the right thing to do, but it seems to lead to an implausible break in the two series.
// There seems to be a different GDP deflator at work here, making the series incomparable between each other.
// Luckily, we have no elections that effectively straddle the two series, so we can use them separately.
*replace inc_ul_quin1_yb = inc_ul_quin1_yb/(gdpdeflator/100) if year < 1976
*replace inc_ul_quin2_yb = inc_ul_quin2_yb/(gdpdeflator/100) if year < 1976
*replace inc_ul_quin3_yb = inc_ul_quin3_yb/(gdpdeflator/100) if year < 1976
*replace inc_ul_quin4_yb = inc_ul_quin4_yb/(gdpdeflator/100) if year < 1976
*replace inc_ll_95_yb    = inc_ll_95_yb/(gdpdeflator/100) if year < 1976
*replace inc_mean_yb     = inc_mean_yb/(gdpdeflator/100) if year < 1976
*replace inc_50_yb       = inc_50_yb/(gdpdeflator/100) if year < 1976

*replace inc_ll_95_sc    = inc_ll_95_sc/(gdpdeflator/100)
*foreach varn of varlist *thresh* inc_t*_mean_* {
*  replace `varn' = `varn'/(gdpdeflator/100)
*}


foreach varn of varlist inc_distyb_* inc_t5_mean_* inc_t5_thresh_* {
  replace `varn' = `varn'/(gdpdeflator/100)
}

rename year election_year
merge 1:1 election_year using "$gendatadir/cdn incumbents.dta"
rename election_year year
drop _merge

save "$gendatadir/canadaincomeprocessed.dta", replace

clear
use "$gendatadir/canadaincomeprocessed.dta"

generate edate = date(election_date ,"DMY", 2015)
format edate %td
drop election_date
rename edate election_date

merge 1:1 country year using "$gendatadir/wtid_processed.dta", keep(master match)
drop _merge

tsset year
sort year

replace share5 = l.share5 if year == 2011

foreach incstub1 in "mi" "at" "ti" {
  foreach quin in 1 2 3 4 {
    gen inc_quin`quin'_m_`incstub1'_g1 = (inc_`incstub1'_mean_`quin' - l.inc_`incstub1'_mean_`quin')/l.inc_`incstub1'_mean_`quin'
  }
  gen inc_quin12_m_`incstub1' = (inc_`incstub1'_mean_1 + inc_`incstub1'_mean_2) / 2
  gen inc_quin12_m_`incstub1'_g1 = (inc_quin12_m_`incstub1' - l.inc_quin12_m_`incstub1') / l.inc_quin12_m_`incstub1'
  gen inc_t5_mean_`incstub1'_g1 = (inc_t5_mean_`incstub1' - l.inc_t5_mean_`incstub1') / l.inc_t5_mean_`incstub1'
  gen inc_t5_thresh_`incstub1'_g1 = (inc_t5_thresh_`incstub1' - l.inc_t5_thresh_`incstub1') / l.inc_t5_thresh_`incstub1'
}


local incstub1 "at" // "mi" | "at" | "ti"
local incstub2 "at" // "mi" | "at" | "ti" + "cg" or not (for including capital gains where it's available)

foreach quin in 1 2 3 4 {
  gen inc_dist_quin`quin'_g1 = (inc_dist_quin`quin'_ul - l.inc_dist_quin`quin'_ul)/l.inc_dist_quin`quin'_ul
}
gen inc_dist_quin23    = (inc_dist_quin2_ul + inc_dist_quin3_ul)/2
gen inc_dist_quin23_g1 = (inc_dist_quin23  - l.inc_dist_quin23)/l.inc_dist_quin23
gen inc_dist_ll_95_g1  = (inc_dist_ll_95    - l.inc_dist_ll_95)/l.inc_dist_ll_95
gen inc_dist_mean_g1   = (inc_dist_mean - l.inc_dist_mean)/l.inc_dist_mean

foreach quin in 1 2 3 4 {
  gen inc_distyb_at_quin`quin'_g1 = (inc_distyb_at_quin`quin' - l.inc_distyb_at_quin`quin')/l.inc_distyb_at_quin`quin'
}
***gen inc_distyb_at_ll_95_g1    = (inc_distyb_at_ll_95 - l.inc_distyb_at_ll_95)/l.inc_distyb_at_ll_95
gen inc_distyb_at_ll_95_g1    = (inc_distyb_at_ll_95 - l.inc_distyb_at_ll_95) / l.inc_distyb_at_ll_95
gen inc_distyb_at_mean_g1     = (inc_distyb_at_mean  - l.inc_distyb_at_mean) /l.inc_distyb_at_mean

gen inc_at_mean = (inc_at_mean_1 + inc_at_mean_2 + inc_at_mean_3 + inc_at_mean_4 + inc_at_mean_5)/5
gen inc_at_mean_g1 = (inc_at_mean - l.inc_at_mean)/(l.inc_at_mean)
gen inc_mi_mean = (inc_mi_mean_1 + inc_mi_mean_2 + inc_mi_mean_3 + inc_mi_mean_4 + inc_mi_mean_5)/5
gen inc_mi_mean_g1 = (inc_mi_mean - l.inc_mi_mean)/(l.inc_mi_mean)
gen inc_ti_mean = (inc_ti_mean_1 + inc_ti_mean_2 + inc_ti_mean_3 + inc_ti_mean_4 + inc_ti_mean_5)/5
gen inc_ti_mean_g1 = (inc_ti_mean - l.inc_ti_mean)/(l.inc_ti_mean)


// Copy the 2010 observation from WTID into 2011, giving us an extra year of election coverage.
// This appears acceptable as the election date was 2011/05/02, meaning that most of the growth
// measure we would have used would have come directly from 2010, anyway.
replace wtid_incgrowthtop5 = l.wtid_incgrowthtop5 if year == 2011

/////////////// Section that chooses precisely which income measures get used.
gen     inc_mean_xx_g1  = inc_at_mean_g1
*replace inc_mean_xx_g1  = inc_dist_mean_g1 if inc_mean_xx_g1 == .
*replace inc_mean_xx_g1  = inc_distyb_at_mean_g1 if inc_mean_xx_g1 == .
replace inc_mean_xx_g1 = wtid_incgrowthmean if inc_mean_xx_g1 == .

*gen     inc_top5_xx_g1 = inc_t5_thresh_at_g1
gen     inc_top5_xx_g1 = inc_t5_mean_at_g1
*replace inc_top5_xx_g1 = inc_dist_ll_95_g1 if inc_top5_xx_g1 == .
*replace inc_top5_xx_g1 = inc_distyb_at_ll_95_g1 if inc_top5_xx_g1 == .
replace inc_top5_xx_g1 = wtid_incgrowthtop5 if inc_top5_xx_g1 == .

gen     inc_quin1_xx_g1 = inc_quin1_m_at_g1
replace inc_quin1_xx_g1 = inc_dist_quin1_g1 if inc_quin1_xx_g1 == .
*replace inc_quin1_xx_g1 = inc_distyb_at_quin1_g1 if inc_quin1_xx_g1 == .
gen     inc_quin2_xx_g1 = inc_quin2_m_at_g1
replace inc_quin2_xx_g1 = inc_dist_quin2_g1 if inc_quin2_xx_g1 == .
*replace inc_quin2_xx_g1 = inc_distyb_at_quin2_g1 if inc_quin2_xx_g1 == .
gen     inc_quin3_xx_g1 = inc_quin3_m_at_g1
replace inc_quin3_xx_g1 = inc_dist_quin23_g1 if inc_quin3_xx_g1 == .
*replace inc_quin3_xx_g1 = inc_distyb_at_quin3_g1 if inc_quin3_xx_g1 == .
gen     inc_quin4_xx_g1 = inc_quin4_m_at_g1
replace inc_quin4_xx_g1 = inc_dist_quin4_g1 if inc_quin4_xx_g1 == .
*replace inc_quin4_xx_g1 = inc_distyb_at_quin4_g1 if inc_quin4_xx_g1 == .
gen     inc_quin12_xx_g1 = inc_quin12_m_at_g1 
replace inc_quin12_xx_g1 = inc_dist_quin1_g1 if inc_quin12_xx_g1 == .
*replace inc_quin12_xx_g1 = inc_distyb_at_quin1_g1 if inc_quin12_xx_g1 == .


*tsline inc_at_mean_g1 wtid_incgrowthmean inc_distyb_at_mean_g1,     xtitle("Year") xlabel(1965(5)2015) graphregion(fcolor(white)) title("Mean income growth rates") legend(lab(1 "StatCan") lab(2 "WTID") lab(3 "Yearbooks"))
tsline inc_at_mean_g1 wtid_incgrowthmean,     xtitle("Year") xlabel(1965(5)2015) graphregion(fcolor(white)) title("Mean income growth rates") legend(lab(1 "StatCan") lab(2 "WTID"))
graph export "$imagedir/ca_tslines_mean.pdf", replace
window manage close graph
*tsline inc_t5_mean_at_g1 wtid_incgrowthtop5 inc_distyb_at_ll_95_g1, xtitle("Year") xlabel(1965(5)2015) graphregion(fcolor(white)) title("Top income growth rates")  legend(lab(1 "StatCan: T5M") lab(2 "WTID: T5M") lab(3 "Yearbooks: P95"))
tsline inc_t5_mean_at_g1 wtid_incgrowthtop5, xtitle("Year") xlabel(1965(5)2015) graphregion(fcolor(white)) title("Top income growth rates")  legend(lab(1 "StatCan: T5M") lab(2 "WTID: T5M"))
graph export "$imagedir/ca_tslines_t5.pdf", replace
window manage close graph


gen election_year_weight = doy(election_date)/365
foreach varn of varlist inc_* wtid_inc* share* {
  gen orig_`varn' = `varn'
  replace `varn' = election_year_weight * orig_`varn' + (1-election_year_weight) * l.orig_`varn' if election_year_weight != .
  *replace `varn' = l.`varn' if halfyear(election_date) == 1
  *drop if year < 1967
}

//// Scatter plot of mean and t5 growth rates
// Do this before we drop the non-election years.
bysort year: gen macro_analysis = 1 if _n == 1
gen inc_top5_xx_g1_100 =  inc_top5_xx_g1 * 100
gen inc_mean_xx_g1_100 = inc_mean_xx_g1 * 100
twoway (function y=x, range(-10 10)) (scatter inc_top5_xx_g1_100 inc_mean_xx_g1_100 if election_date == ., msymbol(oh) yline(0, lstyle(dot)) xline(0, lstyle(dot))) (scatter inc_top5_xx_g1_100 inc_mean_xx_g1_100 if election_date != ., msymbol(o) mcolor(gs4)), scheme(s1mono) xtitle("Mean income growth (%)") ytitle("Top-5% income growth (%)") xlabel(-10(4)10) ylabel(-10(4)10) legend(off)
graph export "$imagedir/ca_scatter_mean_t5.pdf", replace
window manage close graph

cor inc_top5_xx_g1_100 inc_mean_xx_g1_100
local rho =  string(r(rho), "%9.2fc")
capture {
  file open  rhofile using "$texdir/ca_rho_mean_t5.tex", write replace
  file write rhofile "`rho'"
  file close rhofile
}
cor inc_top5_xx_g1_100 inc_mean_xx_g1_100 if election_date != .
local rho =  string(r(rho), "%9.2fc")
capture {
  file open  rhofile using "$texdir/ca_rho_mean_t5_eyears.tex", write replace
  file write rhofile "`rho'"
  file close rhofile
}
